GitHub Repository: https://github.com/sastoudt/stat248project_BayDeltaTS
setwd("~/Desktop/sfei")
sfei<-read.csv("sfeiPlusDates.csv",stringsAsFactors=F)
names(sfei)
## [1] "Date" "Station" "Longitude" "Latitude" "Depth"
## [6] "nh4" "bod" "cl" "clt" "chl"
## [11] "EC" "tkn" "no3" "no2" "no23"
## [16] "doc" "toc" "don" "ton" "po4"
## [21] "do" "pH" "pheo" "tp" "secchi"
## [26] "sio2" "tds" "tss" "vss" "temp"
## [31] "turb" "sal" "do_per" "din" "tn"
## [36] "date_dec" "doy"
require(ggplot2)
## Loading required package: ggplot2
sub=subset(sfei,Station%in%c("D10","D12","D4","D22","D26")) ## middle/most connected region of interest
## EDA to find out which station/variable combos have most complete records
ggplot(sub, aes(x=date_dec, y=din, colour=as.factor(Station)))+geom_line()+xlim(2000,2015) ## care about recent past
## Warning: Removed 924 rows containing missing values (geom_path).
##percent
percentMissingChl=percentMissingDo=percentMissingPheo=percentMissingTemp=percentMissingSal=c()
perStation=unique(sfei$Station)
for(i in 1:length(perStation)){
data=subset(sfei,date_dec>2000 &Station==perStation[i])
percentMissingChl<-c(percentMissingChl,length(which(is.na(data$chl)))/nrow(data))
percentMissingDo<-c(percentMissingDo,length(which(is.na(data$do)))/nrow(data))
percentMissingPheo<-c(percentMissingPheo,length(which(is.na(data$pheo)))/nrow(data))
percentMissingTemp<-c(percentMissingTemp,length(which(is.na(data$temp)))/nrow(data))
percentMissingSal<-c(percentMissingSal,length(which(is.na(data$sal)))/nrow(data))
## chl
## do
## pheo
## temp
## sal
}
percentMissingChl[which(perStation%in%c("D10","D12","D4","D22","D26"))]
## [1] 0 0 0 0 0
percentMissingDo[which(perStation%in%c("D10","D12","D4","D22","D26"))]
## [1] 0.05232558 0.05202312 0.04093567 0.01149425 0.01149425
percentMissingPheo[which(perStation%in%c("D10","D12","D4","D22","D26"))]
## [1] 0 0 0 0 0
percentMissingTemp[which(perStation%in%c("D10","D12","D4","D22","D26"))]
## [1] 0.03488372 0.03468208 0.01754386 0.01149425 0.01149425
percentMissingSal[which(perStation%in%c("D10","D12","D4","D22","D26"))]
## [1] 0.04069767 0.04046243 0.01754386 0.01149425 0.01149425
## counts
percentMissingChl=percentMissingDo=percentMissingPheo=percentMissingTemp=percentMissingSal=c()
perStation=c("D10","D12","D4","D22","D26")
for(i in 1:length(perStation)){
data=subset(sfei,date_dec>2000 &Station==perStation[i])
percentMissingChl<-c(percentMissingChl,length(which(is.na(data$chl))))
percentMissingDo<-c(percentMissingDo,length(which(is.na(data$do))))
percentMissingPheo<-c(percentMissingPheo,length(which(is.na(data$pheo))))
percentMissingTemp<-c(percentMissingTemp,length(which(is.na(data$temp))))
percentMissingSal<-c(percentMissingSal,length(which(is.na(data$sal))))
## chl
## do
## pheo
## temp
## sal
}
percentMissingDo
## [1] 9 9 2 7 2
percentMissingTemp
## [1] 6 6 2 3 2
percentMissingSal
## [1] 7 7 2 3 2
## any back to back missing values?
## avg value before and after missing values
stationUse=c("D10","D12","D4","D22","D26")
for(i in 1:length(stationUse)){
data=subset(sfei,date_dec>2000 &Station==stationUse[i])
print(sum(diff(which(is.na(data$do)))==1))
print(sum(diff(which(is.na(data$temp)))==1))
print(sum(diff(which(is.na(data$sal)))==1))
}
## [1] 1
## [1] 1
## [1] 2
## [1] 2
## [1] 1
## [1] 2
## [1] 1
## [1] 1
## [1] 1
## [1] 2
## [1] 1
## [1] 1
## [1] 1
## [1] 1
## [1] 1
## missingness same for all variables within a station?
## often but not always
stationUse=c("D10","D12","D4","D22","D26")
for(i in 1:length(stationUse)){
data=subset(sfei,date_dec>2000 &Station==stationUse[i])
print(intersect(which(is.na(data$do)),which(is.na(data$temp))))
print(intersect(which(is.na(data$temp)),which(is.na(data$sal))))
}
## [1] 6 8 15 16 23
## [1] 6 8 15 16 23 167
## [1] 4 9 16 17 163 169
## [1] 4 9 16 17 163 169
## [1] 169 170
## [1] 169 170
## [1] 15 166 167
## [1] 15 166 167
## [1] 169 170
## [1] 169 170
stationUse=c("D10","D12","D4","D22","D26")
dataBeforeImputation=subset(sfei,date_dec>=2000 & Station%in%stationUse)
nrow(dataBeforeImputation) ## 864
## [1] 864
length(which(is.na(dataBeforeImputation$chl))) ## 0
## [1] 0
length(which(is.na(dataBeforeImputation$pheo))) ## 0
## [1] 0
length(which(is.na(dataBeforeImputation$do))) ## 29
## [1] 29
length(which(is.na(dataBeforeImputation$sal))) ## 21
## [1] 21
length(which(is.na(dataBeforeImputation$temp))) ## 19
## [1] 19
sfei=dataBeforeImputation
sfei$yr=as.numeric(as.character(format(as.Date(sfei$Date), "%Y")))
sfei$mon=as.numeric(as.character(format(as.Date(sfei$Date),"%m")))
sfei=sfei[,c("Date","Station","Longitude","Latitude","chl","do","pheo","sal","temp","date_dec","doy","yr","mon")]
yrSeq=seq(2000,2015,by=1)
monSeq=seq(1,12,by=1)
dataTimeFull=as.data.frame(expand.grid(yrSeq,monSeq))
names(dataTimeFull)=c("yr","mon")
D10<-subset(sfei,Station=="D10")
D10f=merge(dataTimeFull,D10,by.x=c("yr","mon"),by.y=c("yr","mon"),all.x=T)
D12<-subset(sfei,Station=="D12")
D12f=merge(dataTimeFull,D12,by.x=c("yr","mon"),by.y=c("yr","mon"),all.x=T)
D22<-subset(sfei,Station=="D22")
D22f=merge(dataTimeFull,D22,by.x=c("yr","mon"),by.y=c("yr","mon"),all.x=T)
D26<-subset(sfei,Station=="D26")
D26f=merge(dataTimeFull,D26,by.x=c("yr","mon"),by.y=c("yr","mon"),all.x=T)
D4<-subset(sfei,Station=="D4")
D4f=merge(dataTimeFull,D4,by.x=c("yr","mon"),by.y=c("yr","mon"),all.x=T)
## now need to fill in NAs
D10f[which(is.na(D10f$Date)),"Date"]=paste(D10f[which(is.na(D10f$Date)),"yr"],D10f[which(is.na(D10f$Date)),"mon"],
"01",sep="-" )
D10f$Date=as.Date(D10f$Date)
D10f[which(is.na(D10f$Station)),"Station"]="D10"
D10f[which(is.na(D10f$Longitude)),"Longitude"]=D10f$Longitude[1]
D10f[which(is.na(D10f$Latitude)),"Latitude"]=D10f$Latitude[1]
require(lubridate)
## Loading required package: lubridate
##
## Attaching package: 'lubridate'
## The following object is masked from 'package:base':
##
## date
require(WRTDStidal)
## Loading required package: WRTDStidal
D10f[which(is.na(D10f$date_dec)),"date_dec"]=as.numeric(dec_time(D10f[which(is.na(D10f$date_dec)),"Date"])$dec_time)
D10f[which(is.na(D10f$doy)),"doy"]=as.numeric(strftime(D10f[which(is.na(D10f$doy)),"Date"], format = "%j"))
D12f[which(is.na(D12f$Date)),"Date"]=paste(D12f[which(is.na(D12f$Date)),"yr"],D12f[which(is.na(D12f$Date)),"mon"],
"01",sep="-" )
D12f$Date=as.Date(D12f$Date)
D12f[which(is.na(D12f$Station)),"Station"]="D12"
D12f[which(is.na(D12f$Longitude)),"Longitude"]=D12f$Longitude[1]
D12f[which(is.na(D12f$Latitude)),"Latitude"]=D12f$Latitude[1]
D12f[which(is.na(D12f$date_dec)),"date_dec"]=as.numeric(dec_time(D12f[which(is.na(D12f$date_dec)),"Date"])$dec_time)
D12f[which(is.na(D12f$doy)),"doy"]=as.numeric(strftime(D12f[which(is.na(D12f$doy)),"Date"], format = "%j"))
D22f[which(is.na(D22f$Date)),"Date"]=paste(D22f[which(is.na(D22f$Date)),"yr"],D22f[which(is.na(D22f$Date)),"mon"],
"01",sep="-" )
D22f$Date=as.Date(D22f$Date)
D22f[which(is.na(D22f$Station)),"Station"]="D22"
D22f[which(is.na(D22f$Longitude)),"Longitude"]=D22f$Longitude[1]
D22f[which(is.na(D22f$Latitude)),"Latitude"]=D22f$Latitude[1]
D22f[which(is.na(D22f$date_dec)),"date_dec"]=as.numeric(dec_time(D22f[which(is.na(D22f$date_dec)),"Date"])$dec_time)
D22f[which(is.na(D22f$doy)),"doy"]=as.numeric(strftime(D22f[which(is.na(D22f$doy)),"Date"], format = "%j"))
D26f[which(is.na(D26f$Date)),"Date"]=paste(D26f[which(is.na(D26f$Date)),"yr"],D26f[which(is.na(D26f$Date)),"mon"],
"01",sep="-" )
D26f$Date=as.Date(D26f$Date)
D26f[which(is.na(D26f$Station)),"Station"]="D26"
D26f[which(is.na(D26f$Longitude)),"Longitude"]=D26f$Longitude[1]
D26f[which(is.na(D26f$Latitude)),"Latitude"]=D26f$Latitude[1]
D26f[which(is.na(D26f$date_dec)),"date_dec"]=as.numeric(dec_time(D26f[which(is.na(D26f$date_dec)),"Date"])$dec_time)
D26f[which(is.na(D26f$doy)),"doy"]=as.numeric(strftime(D26f[which(is.na(D26f$doy)),"Date"], format = "%j"))
D4f[which(is.na(D4f$Date)),"Date"]=paste(D4f[which(is.na(D4f$Date)),"yr"],D4f[which(is.na(D4f$Date)),"mon"],
"01",sep="-" )
D4f$Date=as.Date(D4f$Date)
D4f[which(is.na(D4f$Station)),"Station"]="D4"
D4f[which(is.na(D4f$Longitude)),"Longitude"]=D4f$Longitude[1]
D4f[which(is.na(D4f$Latitude)),"Latitude"]=D4f$Latitude[1]
D4f[which(is.na(D4f$date_dec)),"date_dec"]=as.numeric(dec_time(D4f[which(is.na(D4f$date_dec)),"Date"])$dec_time)
D4f[which(is.na(D4f$doy)),"doy"]=as.numeric(strftime(D4f[which(is.na(D4f$doy)),"Date"], format = "%j"))
### now impute actual variables
#D10f[which(is.na(D10f$chl)),"chl"]
toImputeChl=which(is.na(D10f$chl))
toImputeDo=which(is.na(D10f$do))
toImputePheo=which(is.na(D10f$pheo))
toImputeSal=which(is.na(D10f$sal))
toImputeTemp=which(is.na(D10f$temp))
for(i in 1:length(toImputeChl)){
D10f$chl[toImputeChl[i]]=mean(c(D10f$chl[toImputeChl[i]-1],D10f$chl[toImputeChl[i]+1]),na.rm=T)
}
sum(is.na(D10f$chl))
## [1] 0
for(i in 1:length(toImputeDo)){
D10f$do[toImputeDo[i]]=mean(c(D10f$do[toImputeDo[i]-1],D10f$do[toImputeDo[i]+1]),na.rm=T)
}
sum(is.na(D10f$do))
## [1] 0
for(i in 1:length(toImputePheo)){
D10f$pheo[toImputePheo[i]]=mean(c(D10f$pheo[toImputePheo[i]-1],D10f$pheo[toImputePheo[i]+1]),na.rm=T)
}
sum(is.na(D10f$pheo))
## [1] 0
for(i in 1:length(toImputeSal)){
D10f$sal[toImputeSal[i]]=mean(c(D10f$sal[toImputeSal[i]-1],D10f$sal[toImputeSal[i]+1]),na.rm=T)
}
sum(is.na(D10f$sal))
## [1] 0
for(i in 1:length(toImputeTemp)){
D10f$temp[toImputeTemp[i]]=mean(c(D10f$temp[toImputeTemp[i]-1],D10f$temp[toImputeTemp[i]+1]),na.rm=T)
}
sum(is.na(D10f$temp))
## [1] 0
###
toImputeChl=which(is.na(D12f$chl))
toImputeDo=which(is.na(D12f$do))
toImputePheo=which(is.na(D12f$pheo))
toImputeSal=which(is.na(D12f$sal))
toImputeTemp=which(is.na(D12f$temp))
for(i in 1:length(toImputeChl)){
D12f$chl[toImputeChl[i]]=mean(c(D12f$chl[toImputeChl[i]-1],D12f$chl[toImputeChl[i]+1]),na.rm=T)
}
sum(is.na(D12f$chl))
## [1] 0
for(i in 1:length(toImputeDo)){
D12f$do[toImputeDo[i]]=mean(c(D12f$do[toImputeDo[i]-1],D12f$do[toImputeDo[i]+1]),na.rm=T)
}
sum(is.na(D12f$do))
## [1] 0
for(i in 1:length(toImputePheo)){
D12f$pheo[toImputePheo[i]]=mean(c(D12f$pheo[toImputePheo[i]-1],D12f$pheo[toImputePheo[i]+1]),na.rm=T)
}
sum(is.na(D12f$pheo))
## [1] 0
for(i in 1:length(toImputeSal)){
D12f$sal[toImputeSal[i]]=mean(c(D12f$sal[toImputeSal[i]-1],D12f$sal[toImputeSal[i]+1]),na.rm=T)
}
sum(is.na(D12f$sal))
## [1] 0
for(i in 1:length(toImputeTemp)){
D12f$temp[toImputeTemp[i]]=mean(c(D12f$temp[toImputeTemp[i]-1],D12f$temp[toImputeTemp[i]+1]),na.rm=T)
}
sum(is.na(D12f$temp))
## [1] 0
###
toImputeChl=which(is.na(D22f$chl))
toImputeDo=which(is.na(D22f$do))
toImputePheo=which(is.na(D22f$pheo))
toImputeSal=which(is.na(D22f$sal))
toImputeTemp=which(is.na(D22f$temp))
for(i in 1:length(toImputeChl)){
D22f$chl[toImputeChl[i]]=mean(c(D22f$chl[toImputeChl[i]-1],D22f$chl[toImputeChl[i]+1]),na.rm=T)
}
sum(is.na(D22f$chl))
## [1] 0
for(i in 1:length(toImputeDo)){
D22f$do[toImputeDo[i]]=mean(c(D22f$do[toImputeDo[i]-1],D22f$do[toImputeDo[i]+1]),na.rm=T)
}
sum(is.na(D22f$do))
## [1] 1
for(i in 1:length(toImputePheo)){
D22f$pheo[toImputePheo[i]]=mean(c(D22f$pheo[toImputePheo[i]-1],D22f$pheo[toImputePheo[i]+1]),na.rm=T)
}
sum(is.na(D22f$pheo))
## [1] 0
for(i in 1:length(toImputeSal)){
D22f$sal[toImputeSal[i]]=mean(c(D22f$sal[toImputeSal[i]-1],D22f$sal[toImputeSal[i]+1]),na.rm=T)
}
sum(is.na(D22f$sal))
## [1] 0
for(i in 1:length(toImputeTemp)){
D22f$temp[toImputeTemp[i]]=mean(c(D22f$temp[toImputeTemp[i]-1],D22f$temp[toImputeTemp[i]+1]),na.rm=T)
}
sum(is.na(D22f$temp))
## [1] 0
###
toImputeChl=which(is.na(D26f$chl))
toImputeDo=which(is.na(D26f$do))
toImputePheo=which(is.na(D26f$pheo))
toImputeSal=which(is.na(D26f$sal))
toImputeTemp=which(is.na(D26f$temp))
for(i in 1:length(toImputeChl)){
D26f$chl[toImputeChl[i]]=mean(c(D26f$chl[toImputeChl[i]-1],D26f$chl[toImputeChl[i]+1]),na.rm=T)
}
sum(is.na(D26f$chl))
## [1] 0
for(i in 1:length(toImputeDo)){
D26f$do[toImputeDo[i]]=mean(c(D26f$do[toImputeDo[i]-1],D26f$do[toImputeDo[i]+1]),na.rm=T)
}
sum(is.na(D26f$do))
## [1] 0
for(i in 1:length(toImputePheo)){
D26f$pheo[toImputePheo[i]]=mean(c(D26f$pheo[toImputePheo[i]-1],D26f$pheo[toImputePheo[i]+1]),na.rm=T)
}
sum(is.na(D26f$pheo))
## [1] 0
for(i in 1:length(toImputeSal)){
D26f$sal[toImputeSal[i]]=mean(c(D26f$sal[toImputeSal[i]-1],D26f$sal[toImputeSal[i]+1]),na.rm=T)
}
sum(is.na(D26f$sal))
## [1] 0
for(i in 1:length(toImputeTemp)){
D26f$temp[toImputeTemp[i]]=mean(c(D26f$temp[toImputeTemp[i]-1],D26f$temp[toImputeTemp[i]+1]),na.rm=T)
}
sum(is.na(D26f$temp))
## [1] 0
###
toImputeChl=which(is.na(D4f$chl))
toImputeDo=which(is.na(D4f$do))
toImputePheo=which(is.na(D4f$pheo))
toImputeSal=which(is.na(D4f$sal))
toImputeTemp=which(is.na(D4f$temp))
for(i in 1:length(toImputeChl)){
D4f$chl[toImputeChl[i]]=mean(c(D4f$chl[toImputeChl[i]-1],D4f$chl[toImputeChl[i]+1]),na.rm=T)
}
sum(is.na(D4f$chl))
## [1] 0
for(i in 1:length(toImputeDo)){
D4f$do[toImputeDo[i]]=mean(c(D4f$do[toImputeDo[i]-1],D4f$do[toImputeDo[i]+1]),na.rm=T)
}
sum(is.na(D4f$do))
## [1] 0
for(i in 1:length(toImputePheo)){
D4f$pheo[toImputePheo[i]]=mean(c(D4f$pheo[toImputePheo[i]-1],D4f$pheo[toImputePheo[i]+1]),na.rm=T)
}
sum(is.na(D4f$pheo))
## [1] 0
for(i in 1:length(toImputeSal)){
D4f$sal[toImputeSal[i]]=mean(c(D4f$sal[toImputeSal[i]-1],D4f$sal[toImputeSal[i]+1]),na.rm=T)
}
sum(is.na(D4f$sal))
## [1] 0
for(i in 1:length(toImputeTemp)){
D4f$temp[toImputeTemp[i]]=mean(c(D4f$temp[toImputeTemp[i]-1],D4f$temp[toImputeTemp[i]+1]),na.rm=T)
}
sum(is.na(D4f$temp))
## [1] 0
###
tail(D10f,20)
## yr mon Date Station Longitude Latitude chl do pheo sal
## 173 2014 5 2014-05-01 D10 -121.9183 38.04631 1.24 10.000 1.46 13.50
## 174 2014 6 2014-06-01 D10 -121.9183 38.04631 1.24 10.000 1.46 13.50
## 175 2014 7 2014-07-01 D10 -121.9183 38.04631 1.24 10.000 1.46 13.50
## 176 2014 8 2014-08-01 D10 -121.9183 38.04631 1.24 10.000 1.46 13.50
## 177 2014 9 2014-09-01 D10 -121.9183 38.04631 1.24 10.000 1.46 13.50
## 178 2014 10 2014-10-01 D10 -121.9183 38.04631 1.24 10.000 1.46 13.50
## 179 2014 11 2014-11-01 D10 -121.9183 38.04631 1.24 10.000 1.46 13.50
## 180 2014 12 2014-12-01 D10 -121.9183 38.04631 0.80 9.700 1.63 13.50
## 181 2015 1 2015-01-16 D10 -121.9183 38.04631 0.36 9.400 1.80 13.50
## 182 2015 2 2015-02-13 D10 -121.9183 38.04631 1.83 9.175 1.06 6.75
## 183 2015 3 2015-03-13 D10 -121.9183 38.04631 4.41 8.950 0.74 0.00
## 184 2015 4 2015-04-13 D10 -121.9183 38.04631 1.50 8.550 1.09 7.70
## 185 2015 5 2015-05-13 D10 -121.9183 38.04631 2.04 8.400 1.56 10.20
## 186 2015 6 2015-06-10 D10 -121.9183 38.04631 3.67 7.850 1.10 9.40
## 187 2015 7 2015-07-01 D10 -121.9183 38.04631 3.67 7.850 1.10 9.40
## 188 2015 8 2015-08-01 D10 -121.9183 38.04631 3.67 7.850 1.10 9.40
## 189 2015 9 2015-09-01 D10 -121.9183 38.04631 3.67 7.850 1.10 9.40
## 190 2015 10 2015-10-01 D10 -121.9183 38.04631 3.67 7.850 1.10 9.40
## 191 2015 11 2015-11-01 D10 -121.9183 38.04631 3.67 7.850 1.10 9.40
## 192 2015 12 2015-12-01 D10 -121.9183 38.04631 3.67 7.850 1.10 9.40
## temp date_dec doy
## 173 8.96 2014.332 121
## 174 8.96 2014.416 152
## 175 8.96 2014.499 182
## 176 8.96 2014.584 213
## 177 8.96 2014.668 244
## 178 8.96 2014.751 274
## 179 8.96 2014.836 305
## 180 8.96 2014.918 335
## 181 11.38 2015.047 16
## 182 13.80 2015.123 44
## 183 15.80 2015.200 72
## 184 17.16 2015.285 103
## 185 17.50 2015.367 133
## 186 20.55 2015.444 161
## 187 20.55 2015.499 182
## 188 20.55 2015.584 213
## 189 20.55 2015.668 244
## 190 20.55 2015.751 274
## 191 20.55 2015.836 305
## 192 20.55 2015.918 335
## need to trim july on in 2015
D10f=D10f[1:which(D10f$yr=="2015" & D10f$mon=="6"),]
D12f=D12f[1:which(D12f$yr=="2015" & D12f$mon=="6"),]
D22f=D22f[1:which(D22f$yr=="2015" & D22f$mon=="6"),]
D26f=D26f[1:which(D26f$yr=="2015" & D26f$mon=="6"),]
D4f=D4f[1:which(D4f$yr=="2015" & D4f$mon=="6"),]
D22f$Longitude[1]=D22f$Longitude[2]
D22f$Latitude[1]=D22f$Latitude[2]
D22f$do[1]=D22f$do[2]
setwd("~/UC_Berkeley/Semester_4/timeSeries")
afterImputation=rbind(D10f,D12f,D22f,D26f,D4f)
write.csv(afterImputation,"sfeiDataForProject.csv",row.names=F)
setwd("~/UC_Berkeley/Semester_4/timeSeries")
sfeiData<-read.csv("sfeiDataForProject.csv",stringsAsFactors=FALSE)
names(sfeiData)
## [1] "yr" "mon" "Date" "Station" "Longitude"
## [6] "Latitude" "chl" "do" "pheo" "sal"
## [11] "temp" "date_dec" "doy"
## "chl" "do" "pheo" "sal" "temp"
length(unique(sfeiData$Station)) ## 5
## [1] 5
unique(sfeiData$Station) ## "D10" "D12" "D22" "D26" "D4"
## [1] "D10" "D12" "D22" "D26" "D4"
D10<-subset(sfeiData,Station=="D10")
D12<-subset(sfeiData,Station=="D12")
D22<-subset(sfeiData,Station=="D22")
D26<-subset(sfeiData,Station=="D26")
D4<-subset(sfeiData,Station=="D4")
plot(D10$chl,type="l")
require(mgcv)
## Loading required package: mgcv
## Loading required package: nlme
## This is mgcv 1.8-15. For overview type 'help("mgcv-package")'.
require(parallel)
## Loading required package: parallel
nc <- 4 ## have up to 8
if (detectCores()>1) { ## no point otherwise
cl <- makeCluster(nc)
} else cl <- NULL
#### chl ####
sfeiData$Station=as.factor(sfeiData$Station)
chlSeasonalTimeTrend=bam(chl~Station+s(date_dec,bs="cs",by=Station,k=25)+s(doy,bs="cs",by=Station,k=25),data=sfeiData,cluster=cl)
summary(chlSeasonalTimeTrend)
##
## Family: gaussian
## Link function: identity
##
## Formula:
## chl ~ Station + s(date_dec, bs = "cs", by = Station, k = 25) +
## s(doy, bs = "cs", by = Station, k = 25)
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.25077 0.21501 10.468 <2e-16 ***
## StationD12 0.15245 0.30404 0.501 0.6162
## StationD22 0.62694 0.30405 2.062 0.0395 *
## StationD26 -0.10304 0.30409 -0.339 0.7348
## StationD4 0.07352 0.30405 0.242 0.8090
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(date_dec):StationD10 6.399e-06 24 0.000 0.749071
## s(date_dec):StationD12 5.068e-05 24 0.000 0.427507
## s(date_dec):StationD22 1.174e+00 24 0.131 0.065529 .
## s(date_dec):StationD26 2.027e+00 24 0.316 0.012899 *
## s(date_dec):StationD4 1.191e-05 24 0.000 0.475189
## s(doy):StationD10 3.499e+00 24 0.982 1.47e-05 ***
## s(doy):StationD12 2.613e+00 24 0.574 0.000860 ***
## s(doy):StationD22 3.483e+00 24 0.606 0.001719 **
## s(doy):StationD26 2.730e+00 24 0.543 0.001524 **
## s(doy):StationD4 2.929e+00 24 0.712 0.000218 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.0938 Deviance explained = 11.6%
## fREML = 2342.9 Scale est. = 8.596 n = 930
gam.check(chlSeasonalTimeTrend)
##
## Method: fREML Optimizer: perf newton
## full convergence after 16 iterations.
## Gradient range [-7.344914e-06,1.798371e-05]
## (score 2342.87 & scale 8.595965).
## Hessian positive definite, eigenvalue range [2.343281e-06,462.5285].
## Model rank = 245 / 245
##
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
##
## k' edf k-index p-value
## s(date_dec):StationD10 2.40e+01 6.40e-06 8.32e-01 0.00
## s(date_dec):StationD12 2.40e+01 5.07e-05 8.32e-01 0.02
## s(date_dec):StationD22 2.40e+01 1.17e+00 8.32e-01 0.00
## s(date_dec):StationD26 2.40e+01 2.03e+00 8.32e-01 0.00
## s(date_dec):StationD4 2.40e+01 1.19e-05 8.32e-01 0.00
## s(doy):StationD10 2.40e+01 3.50e+00 9.77e-01 0.10
## s(doy):StationD12 2.40e+01 2.61e+00 9.77e-01 0.12
## s(doy):StationD22 2.40e+01 3.48e+00 9.77e-01 0.19
## s(doy):StationD26 2.40e+01 2.73e+00 9.77e-01 0.14
## s(doy):StationD4 2.40e+01 2.93e+00 9.77e-01 0.11
plot(chlSeasonalTimeTrend)
## datedec flat for D10, D12, D4
## increasing for D22
## curved updward for D26
## doy peak in 100s for D10, D12, D4
## plateau 100 to 250 for D22, D26
sfeiData$predGAMchl=chlSeasonalTimeTrend$fitted
sfeiData$residGAMchl=chlSeasonalTimeTrend$resid
D10<-subset(sfeiData,Station=="D10")
D12<-subset(sfeiData,Station=="D12")
D22<-subset(sfeiData,Station=="D22")
D26<-subset(sfeiData,Station=="D26")
D4<-subset(sfeiData,Station=="D4")
plot(D10$chl,type="l")
lines(D10$residGAMchl,col="red")
lines(D10$predGAMchl,col="blue")
par(mfrow=c(2,1))
plot(D10$residGAMchl,type="l") ## check for stationarity
plot(log(D10$residGAMchl+abs(min(D10$residGAMchl))),type="l")
plot(D12$residGAMchl,type="l")
plot(log(D12$residGAMchl+abs(min(D12$residGAMchl))),type="l")
plot(D22$residGAMchl,type="l")
plot(log(D22$residGAMchl+abs(min(D22$residGAMchl))),type="l")
plot(D26$residGAMchl,type="l")
plot(log(D26$residGAMchl+abs(min(D26$residGAMchl))),type="l")
plot(D4$residGAMchl,type="l")
plot(log(D4$residGAMchl+abs(min(D4$residGAMchl))),type="l")
D10$residGAMchlTransform=log(D10$residGAMchl+abs(min(D10$residGAMchl))+1)
D12$residGAMchlTransform=log(D12$residGAMchl+abs(min(D12$residGAMchl))+1)
D22$residGAMchlTransform=log(D22$residGAMchl+abs(min(D22$residGAMchl))+1)
D26$residGAMchlTransform=log(D26$residGAMchl+abs(min(D26$residGAMchl))+1)
D4$residGAMchlTransform=log(D4$residGAMchl+abs(min(D4$residGAMchl))+1)
sfeiData=rbind(D10,D12,D22,D26,D4)
doSeasonalTimeTrend=bam(do~Station+s(date_dec,bs="cs",by=Station,k=25)+s(doy,bs="cs",by=Station,k=25),data=sfeiData,cluster=cl)
summary(doSeasonalTimeTrend)
##
## Family: gaussian
## Link function: identity
##
## Formula:
## do ~ Station + s(date_dec, bs = "cs", by = Station, k = 25) +
## s(doy, bs = "cs", by = Station, k = 25)
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 9.048376 0.036353 248.906 < 2e-16 ***
## StationD12 -0.071472 0.051404 -1.390 0.165
## StationD22 0.019413 0.051405 0.378 0.706
## StationD26 -0.225266 0.051433 -4.380 1.34e-05 ***
## StationD4 -0.003852 0.051406 -0.075 0.940
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(date_dec):StationD10 13.245 24 2.253 1.58e-07 ***
## s(date_dec):StationD12 12.734 24 2.263 8.92e-08 ***
## s(date_dec):StationD22 14.289 24 2.911 4.26e-10 ***
## s(date_dec):StationD26 14.006 24 2.700 3.00e-09 ***
## s(date_dec):StationD4 12.954 24 1.888 5.27e-06 ***
## s(doy):StationD10 5.941 24 10.532 < 2e-16 ***
## s(doy):StationD12 5.417 24 11.220 < 2e-16 ***
## s(doy):StationD22 5.491 24 10.708 < 2e-16 ***
## s(doy):StationD26 6.466 24 14.966 < 2e-16 ***
## s(doy):StationD4 5.808 24 11.506 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.651 Deviance explained = 68.8%
## fREML = 852.35 Scale est. = 0.24566 n = 930
gam.check(doSeasonalTimeTrend)
##
## Method: fREML Optimizer: perf newton
## full convergence after 6 iterations.
## Gradient range [-5.473219e-06,8.978749e-06]
## (score 852.3495 & scale 0.2456584).
## Hessian positive definite, eigenvalue range [1.511284,463.0843].
## Model rank = 245 / 245
##
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
##
## k' edf k-index p-value
## s(date_dec):StationD10 24.000 13.245 0.349 0
## s(date_dec):StationD12 24.000 12.734 0.349 0
## s(date_dec):StationD22 24.000 14.289 0.349 0
## s(date_dec):StationD26 24.000 14.006 0.349 0
## s(date_dec):StationD4 24.000 12.954 0.349 0
## s(doy):StationD10 24.000 5.941 0.811 0
## s(doy):StationD12 24.000 5.417 0.811 0
## s(doy):StationD22 24.000 5.491 0.811 0
## s(doy):StationD26 24.000 6.466 0.811 0
## s(doy):StationD4 24.000 5.808 0.811 0
plot(doSeasonalTimeTrend)
## datedec slight concave D10, D12, D22 D4
## flatter D26
## doy peak 50, min peak 200 all
sfeiData$predGAMdo=doSeasonalTimeTrend$fitted
sfeiData$residGAMdo=doSeasonalTimeTrend$resid
D10<-subset(sfeiData,Station=="D10")
D12<-subset(sfeiData,Station=="D12")
D22<-subset(sfeiData,Station=="D22")
D26<-subset(sfeiData,Station=="D26")
D4<-subset(sfeiData,Station=="D4")
plot(D10$do,type="l")
lines(D10$residGAMdo,col="red")
lines(D10$predGAMdo,col="blue")
## these don't seem to help stick with original
par(mfrow=c(2,1))
plot(D10$residGAMdo,type="l") ## check for stationarity
plot(log(D10$residGAMdo+abs(min(D10$residGAMdo))),type="l")
##http://stackoverflow.com/questions/26617587/finding-optimal-lambda-for-box-cox-transform-in-r
require(MASS)
## Loading required package: MASS
out <- boxcox(lm(D10$residGAMdo+abs(min(D10$residGAMdo))+1~1))
range(out$x[out$y > max(out$y)-qchisq(0.95,1)/2]) ## one is consistent, keep as is
## [1] 0.7474747 1.7979798
plot(D12$residGAMdo,type="l")
plot(log(D12$residGAMdo+abs(min(D12$residGAMdo))),type="l")
out <- boxcox(lm(D12$residGAMdo+abs(min(D12$residGAMdo))+1~1))
range(out$x[out$y > max(out$y)-qchisq(0.95,1)/2]) ## one is consistent, keep as is
## [1] 0.3838384 1.3535354
plot(D22$residGAMdo,type="l")
plot(log(D22$residGAMdo+abs(min(D22$residGAMdo))),type="l")
out <- boxcox(lm(D22$residGAMdo+abs(min(D22$residGAMdo))+1~1))
range(out$x[out$y > max(out$y)-qchisq(0.95,1)/2]) ## one is consistent, keep as is
## [1] 0.6262626 1.5151515
plot(D26$residGAMdo,type="l")
plot(log(D26$residGAMdo+abs(min(D26$residGAMdo))),type="l")
out <- boxcox(lm(D26$residGAMdo+abs(min(D26$residGAMdo))+1~1))
range(out$x[out$y > max(out$y)-qchisq(0.95,1)/2]) ## one is consistent, keep as is
## [1] 0.6262626 1.7171717
plot(D4$residGAMdo,type="l")
plot(log(D4$residGAMdo+abs(min(D4$residGAMdo))),type="l")
out <- boxcox(lm(D4$residGAMdo+abs(min(D4$residGAMdo))+1~1))
range(out$x[out$y > max(out$y)-qchisq(0.95,1)/2]) ##
## [1] 0.9090909 1.9595960
require(forecast)
## Loading required package: forecast
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
## Loading required package: timeDate
## This is forecast 7.3
##
## Attaching package: 'forecast'
## The following object is masked from 'package:nlme':
##
## getResponse
plot(D4$residGAMdo,type="l")
trans.vector = BoxCox( D4$residGAMdo+abs(min(D4$residGAMdo))+1, 1.5)
plot(trans.vector,type="l") ## basically the same
#### pheo ####
pheoSeasonalTimeTrend=bam(pheo~Station+s(date_dec,bs="cs",by=Station,k=25)+s(doy,bs="cs",by=Station,k=25),data=sfeiData,cluster=cl)
summary(pheoSeasonalTimeTrend)
##
## Family: gaussian
## Link function: identity
##
## Formula:
## pheo ~ Station + s(date_dec, bs = "cs", by = Station, k = 25) +
## s(doy, bs = "cs", by = Station, k = 25)
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.29127 0.05781 22.338 <2e-16 ***
## StationD12 -0.04743 0.08174 -0.580 0.5619
## StationD22 -0.02635 0.08174 -0.322 0.7473
## StationD26 -0.26559 0.08175 -3.249 0.0012 **
## StationD4 -0.05826 0.08174 -0.713 0.4762
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(date_dec):StationD10 3.944e-06 24 0.000 0.9462
## s(date_dec):StationD12 4.283e-06 24 0.000 0.7990
## s(date_dec):StationD22 1.537e+00 24 0.256 0.0142 *
## s(date_dec):StationD26 1.512e+00 24 0.131 0.1115
## s(date_dec):StationD4 3.518e-06 24 0.000 0.7584
## s(doy):StationD10 4.253e+00 24 1.091 1e-05 ***
## s(doy):StationD12 1.886e+00 24 0.341 0.0071 **
## s(doy):StationD22 9.002e-05 24 0.000 0.3385
## s(doy):StationD26 2.296e+00 24 0.345 0.0124 *
## s(doy):StationD4 2.191e+00 24 0.291 0.0233 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.0687 Deviance explained = 8.64%
## fREML = 1121 Scale est. = 0.62131 n = 930
gam.check(pheoSeasonalTimeTrend)
##
## Method: fREML Optimizer: perf newton
## full convergence after 18 iterations.
## Gradient range [-1.809772e-06,4.248297e-06]
## (score 1120.987 & scale 0.6213097).
## Hessian positive definite, eigenvalue range [1.349349e-06,462.5197].
## Model rank = 245 / 245
##
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
##
## k' edf k-index p-value
## s(date_dec):StationD10 2.40e+01 3.94e-06 7.86e-01 0.00
## s(date_dec):StationD12 2.40e+01 4.28e-06 7.86e-01 0.00
## s(date_dec):StationD22 2.40e+01 1.54e+00 7.86e-01 0.00
## s(date_dec):StationD26 2.40e+01 1.51e+00 7.86e-01 0.00
## s(date_dec):StationD4 2.40e+01 3.52e-06 7.86e-01 0.00
## s(doy):StationD10 2.40e+01 4.25e+00 9.39e-01 0.05
## s(doy):StationD12 2.40e+01 1.89e+00 9.39e-01 0.04
## s(doy):StationD22 2.40e+01 9.00e-05 9.39e-01 0.04
## s(doy):StationD26 2.40e+01 2.30e+00 9.39e-01 0.04
## s(doy):StationD4 2.40e+01 2.19e+00 9.39e-01 0.06
plot(pheoSeasonalTimeTrend)
## date dec flat D10, D12, D4
## downward trend D22
## down and up D26
## doy, peak 125, low point 275 D10
## concave slight peak around 150 D12, D26, D4 (even slighter for D22)
sfeiData$predGAMpheo=pheoSeasonalTimeTrend$fitted
sfeiData$residGAMpheo=pheoSeasonalTimeTrend$resid
D10<-subset(sfeiData,Station=="D10")
D12<-subset(sfeiData,Station=="D12")
D22<-subset(sfeiData,Station=="D22")
D26<-subset(sfeiData,Station=="D26")
D4<-subset(sfeiData,Station=="D4")
plot(D10$pheo,type="l")
lines(D10$residGAMpheo,col="red")
lines(D10$predGAMpheo,col="blue")
par(mfrow=c(2,1))
plot(D10$residGAMpheo,type="l") ## check for stationarity
plot(log(D10$residGAMpheo+abs(min(D10$residGAMpheo))),type="l")
plot(D12$residGAMpheo,type="l")
plot(log(D12$residGAMpheo+abs(min(D12$residGAMpheo))),type="l")
plot(D22$residGAMpheo,type="l")
plot(log(D22$residGAMpheo+abs(min(D22$residGAMpheo))),type="l")
plot(D26$residGAMpheo,type="l")
plot(log(D26$residGAMpheo+abs(min(D26$residGAMpheo))),type="l")
plot(D4$residGAMpheo,type="l")
plot(log(D4$residGAMpheo+abs(min(D4$residGAMpheo))),type="l")
D10$residGAMpheoTransform=log(D10$residGAMpheo+abs(min(D10$residGAMpheo))+1)
D12$residGAMpheoTransform=log(D12$residGAMpheo+abs(min(D12$residGAMpheo))+1)
D22$residGAMpheoTransform=log(D22$residGAMpheo+abs(min(D22$residGAMpheo))+1)
D26$residGAMpheoTransform=log(D26$residGAMpheo+abs(min(D26$residGAMpheo))+1)
D4$residGAMpheoTransform=log(D4$residGAMpheo+abs(min(D4$residGAMpheo))+1)
sfeiData=rbind(D10,D12,D22,D26,D4)
salSeasonalTimeTrend=bam(sal~Station+s(date_dec,bs="cs",by=Station,k=25)+s(doy,bs="cs",by=Station,k=25),data=sfeiData,cluster=cl)
summary(salSeasonalTimeTrend)
##
## Family: gaussian
## Link function: identity
##
## Formula:
## sal ~ Station + s(date_dec, bs = "cs", by = Station, k = 25) +
## s(doy, bs = "cs", by = Station, k = 25)
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.1895 0.1332 38.97 <2e-16 ***
## StationD12 -3.4021 0.1883 -18.07 <2e-16 ***
## StationD22 -4.0470 0.1883 -21.49 <2e-16 ***
## StationD26 -5.0225 0.1883 -26.68 <2e-16 ***
## StationD4 -2.2796 0.1883 -12.11 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(date_dec):StationD10 1.716e+01 24 15.183 < 2e-16 ***
## s(date_dec):StationD12 5.482e+00 24 2.185 6.91e-11 ***
## s(date_dec):StationD22 4.847e+00 24 2.100 7.97e-11 ***
## s(date_dec):StationD26 1.084e-05 24 0.000 1
## s(date_dec):StationD4 1.265e+01 24 9.474 < 2e-16 ***
## s(doy):StationD10 6.852e+00 24 16.734 < 2e-16 ***
## s(doy):StationD12 3.236e+00 24 2.203 1.32e-12 ***
## s(doy):StationD22 2.834e+00 24 1.886 3.14e-11 ***
## s(doy):StationD26 1.054e-05 24 0.000 1
## s(doy):StationD4 4.802e+00 24 6.041 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.7 Deviance explained = 72%
## fREML = 1979.1 Scale est. = 3.2959 n = 930
gam.check(salSeasonalTimeTrend)
##
## Method: fREML Optimizer: perf newton
## full convergence after 15 iterations.
## Gradient range [-5.244617e-06,3.029862e-07]
## (score 1979.144 & scale 3.295859).
## Hessian positive definite, eigenvalue range [5.143902e-06,462.824].
## Model rank = 245 / 245
##
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
##
## k' edf k-index p-value
## s(date_dec):StationD10 2.40e+01 1.72e+01 6.27e-01 0
## s(date_dec):StationD12 2.40e+01 5.48e+00 6.27e-01 0
## s(date_dec):StationD22 2.40e+01 4.85e+00 6.27e-01 0
## s(date_dec):StationD26 2.40e+01 1.08e-05 6.27e-01 0
## s(date_dec):StationD4 2.40e+01 1.26e+01 6.27e-01 0
## s(doy):StationD10 2.40e+01 6.85e+00 8.48e-01 0
## s(doy):StationD12 2.40e+01 3.24e+00 8.48e-01 0
## s(doy):StationD22 2.40e+01 2.83e+00 8.48e-01 0
## s(doy):StationD26 2.40e+01 1.05e-05 8.48e-01 0
## s(doy):StationD4 2.40e+01 4.80e+00 8.48e-01 0
plot(salSeasonalTimeTrend)
## date dec D10 wiggly peak 2008, same wiggles but dampened D12, D4
## concave plateau D22
## flat D26
## doy min peak 75, peak 325 D10, D4 same but dampened D12, D22
## flat D26
sfeiData$predGAMsal=salSeasonalTimeTrend$fitted
sfeiData$residGAMsal=salSeasonalTimeTrend$resid
D10<-subset(sfeiData,Station=="D10")
D12<-subset(sfeiData,Station=="D12")
D22<-subset(sfeiData,Station=="D22")
D26<-subset(sfeiData,Station=="D26")
D4<-subset(sfeiData,Station=="D4")
plot(D10$sal,type="l")
lines(D10$residGAMsal,col="red")
lines(D10$predGAMsal,col="blue")
## these don't seem to help, stick to original
par(mfrow=c(2,1))
plot(D10$residGAMsal,type="l") ## check for stationarity
plot(log(D10$residGAMsal+abs(min(D10$residGAMsal))),type="l")
out <- boxcox(lm(D10$residGAMsal+abs(min(D10$residGAMsal))+1~1))
range(out$x[out$y > max(out$y)-qchisq(0.95,1)/2]) ## one is consistent, keep this the same
## [1] 0.7878788 1.3535354
plot(D12$residGAMsal,type="l")
plot(log(D12$residGAMsal+abs(min(D12$residGAMsal))),type="l")
out <- boxcox(lm(D12$residGAMsal+abs(min(D12$residGAMsal))+1~1))
range(out$x[out$y > max(out$y)-qchisq(0.95,1)/2]) ##
## [1] 0.3030303 0.8282828
trans.vector = BoxCox( D12$residGAMsal+abs(min(D12$residGAMsal))+1, .5)
plot(D12$residGAMsal,type="l")
plot(trans.vector,type="l") ## looks the same
plot(D22$residGAMsal,type="l")
plot(log(D22$residGAMsal+abs(min(D22$residGAMsal))),type="l")
out <- boxcox(lm(D22$residGAMsal+abs(min(D22$residGAMsal))+1~1))
range(out$x[out$y > max(out$y)-qchisq(0.95,1)/2]) ## this suggest log is better, actually change
## [1] -0.1818182 0.1010101
plot(D26$residGAMsal,type="l")
plot(log(D26$residGAMsal+abs(min(D26$residGAMsal))),type="l")
out <- boxcox(lm(D26$residGAMsal+abs(min(D26$residGAMsal))+1~1))
range(out$x[out$y > max(out$y)-qchisq(0.95,1)/2]) ##
## [1] -2.00000 -1.59596
trans.vector = BoxCox( D26$residGAMsal+abs(min(D26$residGAMsal))+1, -1.75)
plot(D26$residGAMsal,type="l")
plot(trans.vector,type="l") ## looks the same
plot(D4$residGAMsal,type="l")
plot(log(D4$residGAMsal+abs(min(D4$residGAMsal))),type="l")
out <- boxcox(lm(D4$residGAMsal+abs(min(D4$residGAMsal))+1~1))
range(out$x[out$y > max(out$y)-qchisq(0.95,1)/2]) ## 1 is consistent, keep
## [1] 0.6262626 1.1919192
D22$residGAMsalTransform=log(D22$residGAMsal+abs(min(D22$residGAMsal))+1)
require(dplyr)
## Loading required package: dplyr
##
## Attaching package: 'dplyr'
## The following object is masked from 'package:MASS':
##
## select
## The following object is masked from 'package:nlme':
##
## collapse
## The following objects are masked from 'package:lubridate':
##
## intersect, setdiff, union
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
sfeiData=bind_rows(D10,D12,D22,D26,D4)
tempSeasonalTimeTrend=bam(temp~Station+s(date_dec,bs="cs",by=Station,k=25)+s(doy,bs="cs",by=Station,k=25),data=sfeiData,cluster=cl)
summary(tempSeasonalTimeTrend)
##
## Family: gaussian
## Link function: identity
##
## Formula:
## temp ~ Station + s(date_dec, bs = "cs", by = Station, k = 25) +
## s(doy, bs = "cs", by = Station, k = 25)
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 15.9100 0.1061 150.017 <2e-16 ***
## StationD12 0.2163 0.1500 1.442 0.150
## StationD22 0.1298 0.1500 0.866 0.387
## StationD26 0.2958 0.1501 1.971 0.049 *
## StationD4 0.1274 0.1500 0.849 0.396
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(date_dec):StationD10 19.609 24 12.71 <2e-16 ***
## s(date_dec):StationD12 19.075 24 11.21 <2e-16 ***
## s(date_dec):StationD22 20.329 24 14.04 <2e-16 ***
## s(date_dec):StationD26 20.031 24 12.95 <2e-16 ***
## s(date_dec):StationD4 19.684 24 12.64 <2e-16 ***
## s(doy):StationD10 7.472 24 52.31 <2e-16 ***
## s(doy):StationD12 7.726 24 60.26 <2e-16 ***
## s(doy):StationD22 7.875 24 60.40 <2e-16 ***
## s(doy):StationD26 7.573 24 68.79 <2e-16 ***
## s(doy):StationD4 7.679 24 56.06 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.906 Deviance explained = 92%
## fREML = 1954.4 Scale est. = 2.0903 n = 930
gam.check(tempSeasonalTimeTrend)
##
## Method: fREML Optimizer: perf newton
## full convergence after 6 iterations.
## Gradient range [-7.594998e-06,6.200114e-06]
## (score 1954.41 & scale 2.090252).
## Hessian positive definite, eigenvalue range [2.564866,463.7284].
## Model rank = 245 / 245
##
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
##
## k' edf k-index p-value
## s(date_dec):StationD10 24.000 19.609 0.184 0
## s(date_dec):StationD12 24.000 19.075 0.184 0
## s(date_dec):StationD22 24.000 20.329 0.184 0
## s(date_dec):StationD26 24.000 20.031 0.184 0
## s(date_dec):StationD4 24.000 19.684 0.184 0
## s(doy):StationD10 24.000 7.472 0.618 0
## s(doy):StationD12 24.000 7.726 0.618 0
## s(doy):StationD22 24.000 7.875 0.618 0
## s(doy):StationD26 24.000 7.573 0.618 0
## s(doy):StationD4 24.000 7.679 0.618 0
plot(tempSeasonalTimeTrend)
## datedec nearly flat D10, D12, D26, D4
## flat D22
## doy peak 200 all
sfeiData$predGAMtemp=tempSeasonalTimeTrend$fitted
sfeiData$residGAMtemp=tempSeasonalTimeTrend$resid
D10<-subset(sfeiData,Station=="D10")
D12<-subset(sfeiData,Station=="D12")
D22<-subset(sfeiData,Station=="D22")
D26<-subset(sfeiData,Station=="D26")
D4<-subset(sfeiData,Station=="D4")
plot(D10$temp,type="l")
lines(D10$residGAMtemp,col="red")
lines(D10$predGAMtemp,col="blue")
## doesn't really help, stick with original
par(mfrow=c(2,1))
plot(D10$residGAMtemp,type="l") ## check for stationarity
plot(log(D10$residGAMtemp+abs(min(D10$residGAMtemp))),type="l")
out <- boxcox(lm(D10$residGAMtemp+abs(min(D10$residGAMtemp))+1~1))
range(out$x[out$y > max(out$y)-qchisq(0.95,1)/2]) ## 1 is consistent, keep
## [1] 0.7070707 1.3131313
plot(D12$residGAMtemp,type="l")
plot(log(D12$residGAMtemp+abs(min(D12$residGAMtemp))),type="l")
out <- boxcox(lm(D12$residGAMtemp+abs(min(D12$residGAMtemp))+1~1))
range(out$x[out$y > max(out$y)-qchisq(0.95,1)/2]) ## 1 is consistent, keep
## [1] 0.8686869 1.5151515
plot(D22$residGAMtemp,type="l")
plot(log(D22$residGAMtemp+abs(min(D22$residGAMtemp))),type="l")
out <- boxcox(lm(D22$residGAMtemp+abs(min(D22$residGAMtemp))+1~1))
range(out$x[out$y > max(out$y)-qchisq(0.95,1)/2]) ## 1 is consistent, keep
## [1] 0.8686869 1.5151515
plot(D26$residGAMtemp,type="l")
plot(log(D26$residGAMtemp+abs(min(D26$residGAMtemp))),type="l")
out <- boxcox(lm(D26$residGAMtemp+abs(min(D26$residGAMtemp))+1~1))
range(out$x[out$y > max(out$y)-qchisq(0.95,1)/2]) ## 1 is consistent, keep
## [1] 0.9494949 1.6363636
plot(D4$residGAMtemp,type="l")
plot(log(D4$residGAMtemp+abs(min(D4$residGAMtemp))),type="l")
out <- boxcox(lm(D4$residGAMtemp+abs(min(D4$residGAMtemp))+1~1))
range(out$x[out$y > max(out$y)-qchisq(0.95,1)/2]) ## 1 is consistent, keep
## [1] 0.7878788 1.4343434
setwd("~/UC_Berkeley/Semester_4/timeSeries")
write.csv(D10,"D10data.csv",row.names=F)
write.csv(D12,"D12data.csv",row.names=F)
write.csv(D22,"D22data.csv",row.names=F)
write.csv(D26,"D26data.csv",row.names=F)
write.csv(D4,"D4data.csv",row.names=F)
acfTest=function(stationData,nutrient){
if(sum(grepl(paste(nutrient,"Transform",sep=""),names(stationData)))>0){
varName=paste("residGAM",nutrient,"Transform",sep="")
}else{
varName=paste("residGAM",nutrient,sep="")
}
empP<-c()
for(i in 2:nrow(stationData) ){
test1=stationData[c((i):nrow(stationData),1:(i-1)),varName]
test=acf(test1,lag.max=12,plot=F)
acfOfInterest=test$acf[2:13]
empP<-c( empP,acfOfInterest[which.max(abs(acfOfInterest))] )
# print(i)
}
test=acf(stationData[,varName],lag.max=12,plot=F)
acfOfInterest=test$acf[2:13]
lagOfInterest=test$lag[2:13]
maxLag=lagOfInterest[which.max(abs(acfOfInterest))]
if(acfOfInterest[which.max(abs(acfOfInterest))]<0){
pVal=length(which(empP<acfOfInterest[which.max(abs(acfOfInterest))]))/nrow(stationData)
}else{
pVal=length(which(empP>acfOfInterest[which.max(abs(acfOfInterest))]))/nrow(stationData)
}
minPval=1/nrow(stationData)
return(list(maxLag=maxLag,pVal=pVal,minPval=minPval))
}
acfTest(D10,"chl")
## $maxLag
## [1] 1
##
## $pVal
## [1] 0.8064516
##
## $minPval
## [1] 0.005376344
ccfTest=function(station1Data,station2Data,station1Nutrient,station2Nutrient){
if(sum(grepl(paste(station1Nutrient,"Transform",sep=""),names(station1Data)))>0){
varName=paste("residGAM",station1Nutrient,"Transform",sep="")
}else{
varName=paste("residGAM",station1Nutrient,sep="")
}
if(sum(grepl(paste(station2Nutrient,"Transform",sep=""),names(station2Data)))>0){
varName2=paste("residGAM",station2Nutrient,"Transform",sep="")
}else{
varName2=paste("residGAM",station2Nutrient,sep="")
}
empP<-c()
for(i in 2:nrow(station2Data) ){
test1=station2Data[c((i):nrow(station2Data),1:(i-1)),varName2]
test=ccf(station1Data[,varName],test1,lag.max=12,plot=F)
ccfOfInterest=test$acf[14:25]
empP<-c( empP,ccfOfInterest[which.max(abs(ccfOfInterest))] )
# print(i)
}
test=ccf(station1Data[,varName],station2Data[,varName2],lag.max=12,plot=F)
ccfOfInterest=test$acf[14:25]
lagOfInterest=test$lag[14:25]
maxLag=lagOfInterest[which.max(abs(ccfOfInterest))]
if(ccfOfInterest[which.max(abs(ccfOfInterest))]<0){
pVal=length(which(empP<ccfOfInterest[which.max(abs(ccfOfInterest))]))/nrow(station2Data)
}else{
pVal=length(which(empP>ccfOfInterest[which.max(abs(ccfOfInterest))]))/nrow(station2Data)
}
minPval=1/nrow(station2Data)
return(list(maxLag=maxLag,pVal=pVal,minPval=minPval))
}
ccfTest(D10,D12,"chl","chl")
## $maxLag
## [1] 12
##
## $pVal
## [1] 0.2150538
##
## $minPval
## [1] 0.005376344
stationNames<-c("D10","D12","D22","D26","D4")
varNames<-c("chl","do","pheo","sal","temp")
allCombo=expand.grid(stationNames,stationNames,varNames,varNames)
dim(allCombo)
head(allCombo)
names(allCombo)=c("station1","station2","var1","var2")
withinStation=allCombo[which(allCombo$station1==allCombo$station2),]
dim(withinStation) ## 125
withinStationSameVar=withinStation[which(withinStation$var1==withinStation$var2),]
dim(withinStationSameVar) ## 25
withinStationDiffVar=withinStation[which(withinStation$var1!=withinStation$var2),]
dim(withinStationDiffVar) ## 100
leftOver=allCombo[-which(allCombo$station1==allCombo$station2),]
acrossStationSameVar=leftOver[which(leftOver$var1==leftOver$var2),]
dim(acrossStationSameVar) ## 100
leftOver2=leftOver[-which(leftOver$var1==leftOver$var2),]
dim(leftOver2)
acrossStationDiffVar=leftOver2[which(leftOver2$var1!=leftOver2$var2),]
dim(acrossStationDiffVar) ## 400 same as leftOver2 as expected
setwd("~/UC_Berkeley/Semester_4/timeSeries")
D10<-read.csv("D10data.csv",stringsAsFactors=F)
D12<-read.csv("D12data.csv",stringsAsFactors=F)
D22<-read.csv("D22data.csv",stringsAsFactors=F)
D26<-read.csv("D26data.csv",stringsAsFactors=F)
D4<-read.csv("D4data.csv",stringsAsFactors=F)
storeData=vector("list",length(stationNames))
storeData[[1]]=D10
storeData[[2]]=D12
storeData[[3]]=D22
storeData[[4]]=D26
storeData[[5]]=D4
names(storeData)=stationNames
setwd("~/UC_Berkeley/Semester_4/timeSeries")
acrossStationSameVarResults<-c()
for(i in 1:nrow(acrossStationSameVar)){
res= ccfTest(storeData[[acrossStationSameVar$station1[i]]],storeData[[acrossStationSameVar$station2[i]]],
as.character(acrossStationSameVar$var1[i]),as.character(acrossStationSameVar$var2[i]))
acrossStationSameVarResults<-rbind(acrossStationSameVarResults,c(res$maxLag,res$pVal,res$minPval))
print(i)
}
write.csv(acrossStationSameVarResults,"acrossStationSameVarResults.csv",row.names=F)
acrossStationDiffVarResults<-c()
for(i in 1:nrow(acrossStationDiffVar)){
res= ccfTest(storeData[[acrossStationDiffVar$station1[i]]],storeData[[acrossStationDiffVar$station2[i]]],
as.character(acrossStationDiffVar$var1[i]),as.character(acrossStationDiffVar$var2[i]))
acrossStationDiffVarResults<-rbind(acrossStationDiffVarResults,c(res$maxLag,res$pVal,res$minPval))
print(i)
}
write.csv(acrossStationDiffVarResults,"acrossStationDiffVarResults.csv",row.names=F)
withinStationDiffVarResults<-c()
for(i in 1:nrow(withinStationDiffVar)){
res=ccfTest(storeData[[withinStationDiffVar$station1[i]]],storeData[[withinStationDiffVar$station2[i]]],
as.character(withinStationDiffVar$var1[i]),as.character(withinStationDiffVar$var2[i]))
withinStationDiffVarResults<-rbind(withinStationDiffVarResults,c(res$maxLag,res$pVal,res$minPval))
print(i)
}
write.csv(withinStationDiffVarResults,"withinStationDiffVarResults.csv",row.names=F)
## don't actually use this, not one of the research questions
withinStationSameVarResults<-c()
for(i in 1:nrow(withinStationSameVar)){
res=acfTest(storeData[[withinStationSameVar$station1[i]]],as.character(withinStationSameVar$var1[i]))
withinStationSameVarResults<-rbind(withinStationSameVarResults,c(res$maxLag,res$pVal,res$minPval))
print(i)
}
write.csv(withinStationSameVarResults,"withinStationSameVarResults.csv",row.names=F)
require(psd)
## Loading required package: psd
## Loaded psd (1.0.1) -- Adaptive multitaper spectrum estimation
ccfTestFreq=function(station1Data,station2Data,station1Nutrient,station2Nutrient){
if(sum(grepl(paste(station1Nutrient,"Transform",sep=""),names(station1Data)))>0){
varName=paste("residGAM",station1Nutrient,"Transform",sep="")
}else{
varName=paste("residGAM",station1Nutrient,sep="")
}
if(sum(grepl(paste(station2Nutrient,"Transform",sep=""),names(station2Data)))>0){
varName2=paste("residGAM",station2Nutrient,"Transform",sep="")
}else{
varName2=paste("residGAM",station2Nutrient,sep="")
}
## doesn't change results
#tryThis=prewhiten(station1Data[,varName],AR.max=2)
#tryThis2=prewhiten(station2Data[,varName2],AR.max=2)
#test=spectrum(cbind(tryThis$prew_ar,tryThis2$prew_ar),taper=.2,log="no",spans=c(16,16),demean=T,detrend=F,plot=F)
test=spectrum( cbind(station1Data[,varName],station2Data[,varName2]),taper=.2,log="no",spans=c(16,16),demean=T,detrend=F,plot=F)
pVal=1-pf(test$coh/(1-test$coh)*(test$df/2-1),2,test$df-2)[which.max(test$coh)]
maxPhase=test$phase[which.max(test$coh)]
maxCoh=test$coh[which.max(test$coh)]
maxFreq=1/test$freq[which.max(test$coh)]
return(list(maxPhase=maxPhase,maxCoh=maxCoh,maxFreq=maxFreq,pVal=pVal))
}
ccfTestFreq(D10,D12,"chl","pheo")
## $maxPhase
## [1] 1.251477
##
## $maxCoh
## [1] 0.1523391
##
## $maxFreq
## [1] 8.727273
##
## $pVal
## [1] 0.0553945
acfTestFreq=function(stationData,nutrient){
if(sum(grepl(paste(nutrient,"Transform",sep=""),names(stationData)))>0){
varName=paste("residGAM",nutrient,"Transform",sep="")
}else{
varName=paste("residGAM",nutrient,sep="")
}
test=spectrum(stationData[,varName],taper=.2,log="no",spans=c(16,16),demean=T,detrend=F,plot=F)
pVal=1-pchisq(test$spec,2)
maxSpec=test$spec[which.max(test$spec)]
maxFreq=1/test$freq[which.max(test$spec)]
pVal=pVal[which.max(test$spec)]
return(list(maxSpec=maxSpec,maxFreq=maxFreq,pVal=pVal))
}
acfTestFreq(D10,"chl")
## $maxSpec
## [1] 0.2237561
##
## $maxFreq
## [1] 4
##
## $pVal
## [1] 0.8941533
stationNames<-c("D10","D12","D22","D26","D4")
varNames<-c("chl","do","pheo","sal","temp")
allCombo=expand.grid(stationNames,stationNames,varNames,varNames)
dim(allCombo)
## [1] 625 4
head(allCombo)
## Var1 Var2 Var3 Var4
## 1 D10 D10 chl chl
## 2 D12 D10 chl chl
## 3 D22 D10 chl chl
## 4 D26 D10 chl chl
## 5 D4 D10 chl chl
## 6 D10 D12 chl chl
names(allCombo)=c("station1","station2","var1","var2")
withinStation=allCombo[which(allCombo$station1==allCombo$station2),]
dim(withinStation) ## 125
## [1] 125 4
withinStation[,1]=as.character(withinStation[,1])
withinStation[,2]=as.character(withinStation[,2])
withinStation[,3]=as.character(withinStation[,3])
withinStation[,4]=as.character(withinStation[,4])
withinStationSameVar=withinStation[which(withinStation$var1==withinStation$var2),]
dim(withinStationSameVar) ## 25
## [1] 25 4
withinStationSameVar[,1]=as.character(withinStationSameVar[,1])
withinStationSameVar[,2]=as.character(withinStationSameVar[,2])
withinStationSameVar[,3]=as.character(withinStationSameVar[,3])
withinStationSameVar[,4]=as.character(withinStationSameVar[,4])
withinStationDiffVar=withinStation[which(withinStation$var1!=withinStation$var2),]
dim(withinStationDiffVar) ## 100
## [1] 100 4
withinStationDiffVar[,1]=as.character(withinStationDiffVar[,1])
withinStationDiffVar[,2]=as.character(withinStationDiffVar[,2])
withinStationDiffVar[,3]=as.character(withinStationDiffVar[,3])
withinStationDiffVar[,4]=as.character(withinStationDiffVar[,4])
leftOver=allCombo[-which(allCombo$station1==allCombo$station2),]
acrossStationSameVar=leftOver[which(leftOver$var1==leftOver$var2),]
dim(acrossStationSameVar) ## 100
## [1] 100 4
acrossStationSameVar[,1]=as.character(acrossStationSameVar[,1])
acrossStationSameVar[,2]=as.character(acrossStationSameVar[,2])
acrossStationSameVar[,3]=as.character(acrossStationSameVar[,3])
acrossStationSameVar[,4]=as.character(acrossStationSameVar[,4])
leftOver2=leftOver[-which(leftOver$var1==leftOver$var2),]
dim(leftOver2)
## [1] 400 4
acrossStationDiffVar=leftOver2[which(leftOver2$var1!=leftOver2$var2),]
dim(acrossStationDiffVar) ## 400 same as leftOver2 as expected
## [1] 400 4
acrossStationDiffVar[,1]=as.character(acrossStationDiffVar[,1])
acrossStationDiffVar[,2]=as.character(acrossStationDiffVar[,2])
acrossStationDiffVar[,3]=as.character(acrossStationDiffVar[,3])
acrossStationDiffVar[,4]=as.character(acrossStationDiffVar[,4])
setwd("~/UC_Berkeley/Semester_4/timeSeries")
acrossStationSameVarResults<-c()
for(i in 1:nrow(acrossStationSameVar)){
res=ccfTestFreq(storeData[[acrossStationSameVar$station1[i]]],storeData[[acrossStationSameVar$station2[i]]],
as.character(acrossStationSameVar$var1[i]),as.character(acrossStationSameVar$var2[i]))
acrossStationSameVarResults<-rbind(acrossStationSameVarResults,c(res$maxPhase,res$maxCoh,res$maxFreq,res$pVal))
print(i)
}
write.csv(acrossStationSameVarResults,"acrossStationSameVarResultsFreq.csv",row.names=F)
acrossStationDiffVarResults<-c()
for(i in 1:nrow(acrossStationDiffVar)){
res= ccfTestFreq(storeData[[acrossStationDiffVar$station1[i]]],storeData[[acrossStationDiffVar$station2[i]]],
as.character(acrossStationDiffVar$var1[i]),as.character(acrossStationDiffVar$var2[i]))
acrossStationDiffVarResults<-rbind(acrossStationDiffVarResults,c(res$maxPhase,res$maxCoh,res$maxFreq,res$pVal))
print(i)
}
write.csv(acrossStationDiffVarResults,"acrossStationDiffVarResultsFreq.csv",row.names=F)
withinStationDiffVarResults<-c()
for(i in 1:nrow(withinStationDiffVar)){
res=ccfTestFreq(storeData[[withinStationDiffVar$station1[i]]],storeData[[withinStationDiffVar$station2[i]]],
as.character(withinStationDiffVar$var1[i]),as.character(withinStationDiffVar$var2[i]))
withinStationDiffVarResults<-rbind(withinStationDiffVarResults,c(res$maxPhase,res$maxCoh,res$maxFreq,res$pVal))
print(i)
}
write.csv(withinStationDiffVarResults,"withinStationDiffVarResultsFreq.csv",row.names=F)
## don't use, not part of research question
withinStationSameVarResults<-c()
for(i in 1:nrow(withinStationSameVar)){
res=acfTestFreq(storeData[[withinStationSameVar$station1[i]]],as.character(withinStationSameVar$var1[i]))
withinStationSameVarResults<-rbind(withinStationSameVarResults,c(res$maxSpec,res$maxFreq,res$pVal))
print(i)
}
write.csv(withinStationSameVarResults,"withinStationSameVarResultsFreq.csv",row.names=F)
setwd("~/UC_Berkeley/Semester_4/timeSeries")
acrossStationSameVarResults<-read.csv("acrossStationSameVarResults.csv",stringsAsFactors=F)
acrossStationDiffVarResults<-read.csv("acrossStationDiffVarResults.csv",stringsAsFactors=F)
withinStationSameVarResults<-read.csv("withinStationSameVarResults.csv",stringsAsFactors=F)
withinStationDiffVarResults<-read.csv("withinStationDiffVarResults.csv",stringsAsFactors=F)
names(acrossStationSameVarResults)=names(acrossStationDiffVarResults)=names(withinStationSameVarResults)=
names(withinStationDiffVarResults)=c("maxLag","pVal","minPval")
## first check to make sure we have availability to get significant p-values
summary(acrossStationSameVarResults$minPval) ## 0.005376 for all is the max min pval, so we are good
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.005376 0.005376 0.005376 0.005376 0.005376 0.005376
summary(acrossStationDiffVarResults$minPval)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.005376 0.005376 0.005376 0.005376 0.005376 0.005376
summary(withinStationSameVarResults$minPval)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.005376 0.005376 0.005376 0.005376 0.005376 0.005376
summary(withinStationDiffVarResults$minPval)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.005376 0.005376 0.005376 0.005376 0.005376 0.005376
length(which(acrossStationSameVarResults$pVal<0.05)) ## 18
## [1] 18
nrow(acrossStationSameVarResults) ## 100
## [1] 100
length(which(acrossStationDiffVarResults$pVal<0.05)) ## 83
## [1] 83
nrow(acrossStationDiffVarResults) ## 400
## [1] 400
length(which(withinStationSameVarResults$pVal<0.05)) ## 2
## [1] 2
nrow(withinStationSameVarResults) ## 25
## [1] 25
length(which(withinStationDiffVarResults$pVal<0.05)) ## 23
## [1] 23
nrow(withinStationDiffVarResults) ## 100
## [1] 100
sum(p.adjust(acrossStationSameVarResults$pVal, method = "BY") <0.05)## corrected p-val
## [1] 3
## 3
sigToPlot=acrossStationSameVar[which(p.adjust(acrossStationSameVarResults$pVal, method = "BY") <0.05),]
resToPlot=acrossStationSameVarResults[which(p.adjust(acrossStationSameVarResults$pVal, method = "BY") <0.05),]
sigToPlot
## station1 station2 var1 var2
## 153 D22 D10 do do
## 464 D26 D22 sal sal
## 620 D4 D26 temp temp
resToPlot
## maxLag pVal minPval
## 22 6 0 0.005376344
## 71 1 0 0.005376344
## 96 6 0 0.005376344
sum(p.adjust(acrossStationDiffVarResults$pVal, method = "BY") <0.05)## corrected p-val
## [1] 8
## 8
sigToPlot=acrossStationDiffVar[which(p.adjust(acrossStationDiffVarResults$pVal, method = "BY") <0.05),]
resToPlot=acrossStationDiffVarResults[which(p.adjust(acrossStationDiffVarResults$pVal, method = "BY") <0.05),]
sigToPlot
## station1 station2 var1 var2
## 36 D10 D22 do chl
## 89 D26 D22 sal chl
## 206 D10 D12 sal do
## 240 D4 D22 temp do
## 241 D10 D26 temp do
## 440 D4 D22 pheo sal
## 449 D26 D4 pheo sal
## 547 D12 D4 do temp
resToPlot
## maxLag pVal minPval
## 9 2 0 0.005376344
## 51 2 0 0.005376344
## 125 4 0 0.005376344
## 152 4 0 0.005376344
## 153 7 0 0.005376344
## 292 3 0 0.005376344
## 300 3 0 0.005376344
## 358 8 0 0.005376344
sum(p.adjust(withinStationDiffVarResults$pVal, method = "BY") <0.05)## corrected p-val
## [1] 1
## 1
sigToPlot=withinStationDiffVar[which(p.adjust(withinStationDiffVarResults$pVal, method = "BY") <0.05),]
resToPlot=withinStationDiffVarResults[which(p.adjust(withinStationDiffVarResults$pVal, method = "BY") <0.05),]
sigToPlot
## station1 station2 var1 var2
## 532 D12 D12 do temp
resToPlot
## maxLag pVal minPval
## 87 8 0 0.005376344
setwd("~/UC_Berkeley/Semester_4/timeSeries")
acrossStationSameVarResults<-read.csv("acrossStationSameVarResultsFreq.csv",stringsAsFactors=F)
acrossStationDiffVarResults<-read.csv("acrossStationDiffVarResultsFreq.csv",stringsAsFactors=F)
#withinStationSameVarResults<-read.csv("withinStationSameVarResultsFreq.csv",stringsAsFactors=F)
withinStationDiffVarResults<-read.csv("withinStationDiffVarResultsFreq.csv",stringsAsFactors=F)
names(acrossStationSameVarResults)=names(acrossStationDiffVarResults)=names(withinStationDiffVarResults)=c("maxPhase","maxCoh","maxFreq","pVal")
length(which(acrossStationSameVarResults$pVal<0.05)) ## 100 pw 100
## [1] 100
nrow(acrossStationSameVarResults) ## 100
## [1] 100
hist(acrossStationSameVarResults$pVal)
length(which(acrossStationDiffVarResults$pVal<0.05)) ## 266 pw 262
## [1] 266
nrow(acrossStationDiffVarResults) ## 400
## [1] 400
hist(acrossStationDiffVarResults$pVal)
length(which(withinStationDiffVarResults$pVal<0.05)) ## 64 pw 64
## [1] 64
nrow(withinStationDiffVarResults) ## 100
## [1] 100
hist(withinStationDiffVarResults$pVal)
sum(p.adjust(acrossStationSameVarResults$pVal, method = "BY") <0.05)## corrected p-val
## [1] 90
## 90 pw 92
sigToPlot=acrossStationSameVar[which(p.adjust(acrossStationSameVarResults$pVal, method = "BY") <0.05),]
resToPlot=acrossStationSameVarResults[which(p.adjust(acrossStationSameVarResults$pVal, method = "BY") <0.05),]
sigToPlot
## station1 station2 var1 var2
## 2 D12 D10 chl chl
## 3 D22 D10 chl chl
## 4 D26 D10 chl chl
## 5 D4 D10 chl chl
## 6 D10 D12 chl chl
## 8 D22 D12 chl chl
## 9 D26 D12 chl chl
## 10 D4 D12 chl chl
## 11 D10 D22 chl chl
## 12 D12 D22 chl chl
## 14 D26 D22 chl chl
## 15 D4 D22 chl chl
## 16 D10 D26 chl chl
## 17 D12 D26 chl chl
## 18 D22 D26 chl chl
## 20 D4 D26 chl chl
## 21 D10 D4 chl chl
## 22 D12 D4 chl chl
## 23 D22 D4 chl chl
## 24 D26 D4 chl chl
## 152 D12 D10 do do
## 153 D22 D10 do do
## 154 D26 D10 do do
## 155 D4 D10 do do
## 156 D10 D12 do do
## 158 D22 D12 do do
## 159 D26 D12 do do
## 160 D4 D12 do do
## 161 D10 D22 do do
## 162 D12 D22 do do
## 164 D26 D22 do do
## 165 D4 D22 do do
## 166 D10 D26 do do
## 167 D12 D26 do do
## 168 D22 D26 do do
## 170 D4 D26 do do
## 171 D10 D4 do do
## 172 D12 D4 do do
## 173 D22 D4 do do
## 174 D26 D4 do do
## 302 D12 D10 pheo pheo
## 303 D22 D10 pheo pheo
## 305 D4 D10 pheo pheo
## 306 D10 D12 pheo pheo
## 308 D22 D12 pheo pheo
## 310 D4 D12 pheo pheo
## 311 D10 D22 pheo pheo
## 312 D12 D22 pheo pheo
## 314 D26 D22 pheo pheo
## 315 D4 D22 pheo pheo
## 318 D22 D26 pheo pheo
## 320 D4 D26 pheo pheo
## 321 D10 D4 pheo pheo
## 322 D12 D4 pheo pheo
## 323 D22 D4 pheo pheo
## 324 D26 D4 pheo pheo
## 452 D12 D10 sal sal
## 453 D22 D10 sal sal
## 455 D4 D10 sal sal
## 456 D10 D12 sal sal
## 458 D22 D12 sal sal
## 459 D26 D12 sal sal
## 460 D4 D12 sal sal
## 461 D10 D22 sal sal
## 462 D12 D22 sal sal
## 465 D4 D22 sal sal
## 467 D12 D26 sal sal
## 471 D10 D4 sal sal
## 472 D12 D4 sal sal
## 473 D22 D4 sal sal
## 602 D12 D10 temp temp
## 603 D22 D10 temp temp
## 604 D26 D10 temp temp
## 605 D4 D10 temp temp
## 606 D10 D12 temp temp
## 608 D22 D12 temp temp
## 609 D26 D12 temp temp
## 610 D4 D12 temp temp
## 611 D10 D22 temp temp
## 612 D12 D22 temp temp
## 614 D26 D22 temp temp
## 615 D4 D22 temp temp
## 616 D10 D26 temp temp
## 617 D12 D26 temp temp
## 618 D22 D26 temp temp
## 620 D4 D26 temp temp
## 621 D10 D4 temp temp
## 622 D12 D4 temp temp
## 623 D22 D4 temp temp
## 624 D26 D4 temp temp
resToPlot
## maxPhase maxCoh maxFreq pVal
## 1 -1.614801e-01 0.4997432 4.363636 5.421407e-06
## 2 6.263012e-03 0.4735768 192.000000 1.323509e-05
## 3 -6.035021e-01 0.4704407 4.571429 1.468534e-05
## 4 -2.013778e-02 0.7349544 192.000000 8.030165e-11
## 5 1.614801e-01 0.4997432 4.363636 5.421407e-06
## 6 8.386601e-17 0.2905206 2.000000 2.457938e-03
## 7 -3.834747e-01 0.4161474 13.714286 8.108387e-05
## 8 3.003807e-02 0.6125318 4.682927 6.190318e-08
## 9 -6.263012e-03 0.4735768 192.000000 1.323509e-05
## 10 -1.536679e-16 0.2905206 2.000000 2.457938e-03
## 11 -2.742308e-01 0.2691103 2.865672 4.136264e-03
## 12 -2.505597e-02 0.5935356 192.000000 1.430868e-07
## 13 6.035021e-01 0.4704407 4.571429 1.468534e-05
## 14 3.834747e-01 0.4161474 13.714286 8.108387e-05
## 15 2.742308e-01 0.2691103 2.865672 4.136264e-03
## 16 5.987012e-01 0.5758708 3.918367 3.013217e-07
## 17 2.013778e-02 0.7349544 192.000000 8.030165e-11
## 18 -3.003807e-02 0.6125318 4.682927 6.190318e-08
## 19 2.505597e-02 0.5935356 192.000000 1.430868e-07
## 20 -5.987012e-01 0.5758708 3.918367 3.013217e-07
## 21 -7.752784e-02 0.7954245 16.000000 8.627543e-13
## 22 -9.803885e-02 0.7799103 8.347826 3.101963e-12
## 23 1.924453e-01 0.5921634 10.666667 1.517827e-07
## 24 -6.373451e-02 0.8672090 12.800000 4.440892e-16
## 25 7.752784e-02 0.7954245 16.000000 8.627543e-13
## 26 2.955340e-01 0.6851673 2.430380 1.634745e-09
## 27 1.299838e-01 0.5290375 11.294118 1.885132e-06
## 28 -2.502953e-02 0.7420414 12.000000 4.996537e-11
## 29 9.803885e-02 0.7799103 8.347826 3.101963e-12
## 30 -2.955340e-01 0.6851673 2.430380 1.634745e-09
## 31 3.474642e-01 0.5801154 12.800000 2.526745e-07
## 32 2.915141e-02 0.8602990 10.666667 1.110223e-15
## 33 -1.924453e-01 0.5921634 10.666667 1.517827e-07
## 34 -1.299838e-01 0.5290375 11.294118 1.885132e-06
## 35 -3.474642e-01 0.5801154 12.800000 2.526745e-07
## 36 -3.543822e-01 0.6098341 10.666667 6.989779e-08
## 37 6.373451e-02 0.8672090 12.800000 4.440892e-16
## 38 2.502953e-02 0.7420414 12.000000 4.996537e-11
## 39 -2.915141e-02 0.8602990 10.666667 1.110223e-15
## 40 3.543822e-01 0.6098341 10.666667 6.989779e-08
## 41 8.321366e-03 0.5811057 192.000000 2.424428e-07
## 42 -1.561057e-02 0.5732685 192.000000 3.353791e-07
## 44 1.082323e-01 0.4532215 12.800000 2.571347e-05
## 45 -8.321366e-03 0.5811057 192.000000 2.424428e-07
## 46 -7.617191e-01 0.3522855 10.666667 4.990069e-04
## 48 -1.306446e-01 0.4990255 11.294118 5.559183e-06
## 49 1.561057e-02 0.5732685 192.000000 3.353791e-07
## 50 7.617191e-01 0.3522855 10.666667 4.990069e-04
## 51 -1.177906e-02 0.3005925 192.000000 1.913675e-03
## 52 3.093948e-01 0.4842186 6.193548 9.256798e-06
## 55 1.177906e-02 0.3005925 192.000000 1.913675e-03
## 56 7.132110e-01 0.3179848 4.465116 1.231461e-03
## 57 -1.082323e-01 0.4532215 12.800000 2.571347e-05
## 58 1.306446e-01 0.4990255 11.294118 5.559183e-06
## 59 -3.093948e-01 0.4842186 6.193548 9.256798e-06
## 60 -7.132110e-01 0.3179848 4.465116 1.231461e-03
## 61 -2.848852e-01 0.8453521 6.620690 6.439294e-15
## 62 2.655789e-01 0.5182194 5.818182 2.805440e-06
## 64 -1.614712e-01 0.8879604 6.857143 0.000000e+00
## 65 2.848852e-01 0.8453521 6.620690 6.439294e-15
## 66 -4.474455e-16 0.6570138 2.000000 7.321721e-09
## 67 6.537201e-03 0.3824503 192.000000 2.165359e-04
## 68 2.036028e-01 0.8218177 9.142857 7.682743e-14
## 69 -2.655789e-01 0.5182194 5.818182 2.805440e-06
## 70 -5.435023e-16 0.6570138 2.000000 7.321721e-09
## 72 -2.188512e-01 0.5963865 2.370370 1.264987e-07
## 74 -6.537201e-03 0.3824503 192.000000 2.165359e-04
## 77 1.614712e-01 0.8879604 6.857143 0.000000e+00
## 78 -2.036028e-01 0.8218177 9.142857 7.682743e-14
## 79 2.188512e-01 0.5963865 2.370370 1.264987e-07
## 81 -1.306025e-01 0.8755150 96.000000 2.220446e-16
## 82 -6.145561e-02 0.8279414 19.200000 4.174439e-14
## 83 -1.827559e-01 0.9282613 38.400000 0.000000e+00
## 84 -1.118319e-01 0.9404211 24.000000 0.000000e+00
## 85 1.306025e-01 0.8755150 96.000000 2.220446e-16
## 86 1.114711e-01 0.7349132 4.800000 8.052048e-11
## 87 -5.574506e-05 0.9272318 96.000000 0.000000e+00
## 88 6.880169e-02 0.9204743 96.000000 0.000000e+00
## 89 6.145561e-02 0.8279414 19.200000 4.174439e-14
## 90 -1.114711e-01 0.7349132 4.800000 8.052048e-11
## 91 -1.588070e-01 0.8319947 64.000000 2.753353e-14
## 92 -3.018674e-02 0.8831900 19.200000 0.000000e+00
## 93 1.827559e-01 0.9282613 38.400000 0.000000e+00
## 94 5.574506e-05 0.9272318 96.000000 0.000000e+00
## 95 1.588070e-01 0.8319947 64.000000 2.753353e-14
## 96 9.053966e-02 0.9466961 64.000000 0.000000e+00
## 97 1.118319e-01 0.9404211 24.000000 0.000000e+00
## 98 -6.880169e-02 0.9204743 96.000000 0.000000e+00
## 99 3.018674e-02 0.8831900 19.200000 0.000000e+00
## 100 -9.053966e-02 0.9466961 64.000000 0.000000e+00
sum(p.adjust(acrossStationDiffVarResults$pVal, method = "BY") <0.05)## corrected p-val
## [1] 60
sigToPlot=acrossStationDiffVar[which(p.adjust(acrossStationDiffVarResults$pVal, method = "BY") <0.05),]
resToPlot=acrossStationDiffVarResults[which(p.adjust(acrossStationDiffVarResults$pVal, method = "BY") <0.05),]
sigToPlot
## station1 station2 var1 var2
## 37 D12 D22 do chl
## 60 D4 D12 pheo chl
## 65 D4 D22 pheo chl
## 77 D12 D10 sal chl
## 78 D22 D10 sal chl
## 97 D12 D4 sal chl
## 98 D22 D4 sal chl
## 133 D22 D12 chl do
## 210 D4 D12 sal do
## 227 D12 D10 temp do
## 228 D22 D10 temp do
## 229 D26 D10 temp do
## 230 D4 D10 temp do
## 231 D10 D12 temp do
## 233 D22 D12 temp do
## 234 D26 D12 temp do
## 235 D4 D12 temp do
## 236 D10 D22 temp do
## 237 D12 D22 temp do
## 239 D26 D22 temp do
## 240 D4 D22 temp do
## 245 D4 D26 temp do
## 246 D10 D4 temp do
## 247 D12 D4 temp do
## 248 D22 D4 temp do
## 249 D26 D4 temp do
## 272 D12 D4 chl pheo
## 273 D22 D4 chl pheo
## 336 D10 D22 sal pheo
## 337 D12 D22 sal pheo
## 340 D4 D22 sal pheo
## 381 D10 D12 chl sal
## 385 D4 D12 chl sal
## 386 D10 D22 chl sal
## 390 D4 D22 chl sal
## 422 D12 D4 do sal
## 428 D22 D10 pheo sal
## 433 D22 D12 pheo sal
## 448 D22 D4 pheo sal
## 491 D10 D26 temp sal
## 495 D4 D26 temp sal
## 527 D12 D10 do temp
## 528 D22 D10 do temp
## 530 D4 D10 do temp
## 531 D10 D12 do temp
## 533 D22 D12 do temp
## 535 D4 D12 do temp
## 536 D10 D22 do temp
## 537 D12 D22 do temp
## 540 D4 D22 do temp
## 541 D10 D26 do temp
## 542 D12 D26 do temp
## 543 D22 D26 do temp
## 545 D4 D26 do temp
## 546 D10 D4 do temp
## 547 D12 D4 do temp
## 548 D22 D4 do temp
## 549 D26 D4 do temp
## 579 D26 D10 sal temp
## 599 D26 D4 sal temp
resToPlot
## maxPhase maxCoh maxFreq pVal
## 10 -0.6223666 0.3717467 3.368421 2.925321e-04
## 28 -0.3005707 0.3852084 5.485714 2.002159e-04
## 32 0.0459174 0.5442074 2.064516 1.062734e-06
## 41 3.1385178 0.4330774 192.000000 4.844168e-05
## 42 3.1018280 0.3212908 192.000000 1.131038e-03
## 58 -3.1226113 0.4864547 192.000000 8.578843e-06
## 59 3.1205941 0.5968667 192.000000 1.238900e-07
## 86 0.6223666 0.3717467 3.368421 2.925321e-04
## 128 -0.8580222 0.3780607 6.400000 2.451202e-04
## 141 -3.1415927 0.5101513 2.000000 3.752020e-06
## 142 -3.0575810 0.4698239 8.347826 1.498769e-05
## 143 -2.9996736 0.4979480 8.347826 5.772256e-06
## 144 -3.0279939 0.5271913 7.111111 2.018767e-06
## 145 -3.0882514 0.3788031 13.714286 2.400485e-04
## 146 -3.0440486 0.4072159 12.000000 1.057685e-04
## 147 2.8842902 0.4449997 2.742857 3.339104e-05
## 148 3.0020436 0.4313440 2.742857 5.110095e-05
## 149 -3.1415927 0.5047162 2.000000 4.551507e-06
## 150 3.1415927 0.4936105 2.000000 6.710374e-06
## 151 -3.1415927 0.4774100 2.000000 1.164568e-05
## 152 -3.1415927 0.6850216 2.000000 1.648045e-09
## 156 2.9485047 0.4048876 2.704225 1.132816e-04
## 157 -3.0609952 0.4760218 12.800000 1.219926e-05
## 158 3.1415927 0.4672412 2.000000 1.631854e-05
## 159 -3.0975206 0.4584543 8.347826 2.172956e-05
## 160 -3.1415927 0.5404287 2.000000 1.227987e-06
## 178 0.3005707 0.3852084 5.485714 2.002159e-04
## 179 -0.0459174 0.5442074 2.064516 1.062734e-06
## 209 -0.6312983 0.3602177 12.800000 4.021844e-04
## 210 -1.0720948 0.4046958 14.769231 1.139224e-04
## 212 -0.6994686 0.3736429 12.800000 2.774552e-04
## 245 -3.1385178 0.4330774 192.000000 4.844168e-05
## 248 3.1226113 0.4864547 192.000000 8.578843e-06
## 249 -3.1018280 0.3212908 192.000000 1.131038e-03
## 252 -3.1205941 0.5968667 192.000000 1.238900e-07
## 278 0.8580222 0.3780607 6.400000 2.451202e-04
## 282 0.6312983 0.3602177 12.800000 4.021844e-04
## 286 1.0720948 0.4046958 14.769231 1.139224e-04
## 299 0.6994686 0.3736429 12.800000 2.774552e-04
## 313 2.5032456 0.3772504 12.800000 2.507716e-04
## 316 2.3215352 0.3481116 12.800000 5.583948e-04
## 341 3.0882514 0.3788031 13.714286 2.400485e-04
## 342 3.1415927 0.5047162 2.000000 4.551507e-06
## 344 3.0609952 0.4760218 12.800000 1.219926e-05
## 345 3.1415927 0.5101513 2.000000 3.752020e-06
## 346 3.1415927 0.4936105 2.000000 6.710374e-06
## 348 -3.1415927 0.4672412 2.000000 1.631854e-05
## 349 3.0575810 0.4698239 8.347826 1.498769e-05
## 350 3.0440486 0.4072159 12.000000 1.057685e-04
## 352 3.0975206 0.4584543 8.347826 2.172956e-05
## 353 2.9996736 0.4979480 8.347826 5.772256e-06
## 354 -2.8842902 0.4449997 2.742857 3.339104e-05
## 355 -3.1415927 0.4774100 2.000000 1.164568e-05
## 356 -3.1415927 0.5404287 2.000000 1.227987e-06
## 357 3.0279939 0.5271913 7.111111 2.018767e-06
## 358 -3.0020436 0.4313440 2.742857 5.110095e-05
## 359 3.1415927 0.6850216 2.000000 1.648045e-09
## 360 -2.9485047 0.4048876 2.704225 1.132816e-04
## 383 -2.5032456 0.3772504 12.800000 2.507716e-04
## 400 -2.3215352 0.3481116 12.800000 5.583948e-04
sum(p.adjust(withinStationDiffVarResults$pVal, method = "BY") <0.05)## corrected p-val
## [1] 26
sigToPlot=withinStationDiffVar[which(p.adjust(withinStationDiffVarResults$pVal, method = "BY") <0.05),]
resToPlot=withinStationDiffVarResults[which(p.adjust(withinStationDiffVarResults$pVal, method = "BY") <0.05),]
sigToPlot
## station1 station2 var1 var2
## 51 D10 D10 pheo chl
## 57 D12 D12 pheo chl
## 63 D22 D22 pheo chl
## 69 D26 D26 pheo chl
## 75 D4 D4 pheo chl
## 207 D12 D12 sal do
## 226 D10 D10 temp do
## 232 D12 D12 temp do
## 238 D22 D22 temp do
## 244 D26 D26 temp do
## 250 D4 D4 temp do
## 251 D10 D10 chl pheo
## 257 D12 D12 chl pheo
## 263 D22 D22 chl pheo
## 269 D26 D26 chl pheo
## 275 D4 D4 chl pheo
## 363 D22 D22 temp pheo
## 407 D12 D12 do sal
## 494 D26 D26 temp sal
## 526 D10 D10 do temp
## 532 D12 D12 do temp
## 538 D22 D22 do temp
## 544 D26 D26 do temp
## 550 D4 D4 do temp
## 563 D22 D22 pheo temp
## 594 D26 D26 sal temp
resToPlot
## maxPhase maxCoh maxFreq pVal
## 6 0.03230866 0.4691372 2.704225 1.533119e-05
## 7 0.07122826 0.3280448 3.622642 9.493838e-04
## 8 -0.29063634 0.4842777 5.818182 9.238245e-06
## 9 -0.22025085 0.3208126 24.000000 1.145071e-03
## 10 -0.47971020 0.5922786 5.647059 1.510341e-07
## 32 -0.93897895 0.2945916 7.111111 2.222397e-03
## 36 -3.00530632 0.4878493 12.000000 8.180023e-06
## 37 -3.03138569 0.3923411 2.086957 1.632216e-04
## 38 -3.10075220 0.4984476 7.111111 5.672513e-06
## 39 -2.93436039 0.4326341 6.193548 4.910905e-05
## 40 -3.14159265 0.6418548 2.000000 1.561047e-08
## 41 -0.03230866 0.4691372 2.704225 1.533119e-05
## 42 -0.07122826 0.3280448 3.622642 9.493838e-04
## 43 0.29063634 0.4842777 5.818182 9.238245e-06
## 44 0.22025085 0.3208126 24.000000 1.145071e-03
## 45 0.47971020 0.5922786 5.647059 1.510341e-07
## 58 -1.42010544 0.3314213 2.341463 8.692469e-04
## 67 0.93897895 0.2945916 7.111111 2.222397e-03
## 79 2.22431284 0.3009051 12.800000 1.898757e-03
## 86 3.00530632 0.4878493 12.000000 8.180023e-06
## 87 3.03138569 0.3923411 2.086957 1.632216e-04
## 88 3.10075220 0.4984476 7.111111 5.672513e-06
## 89 2.93436039 0.4326341 6.193548 4.910905e-05
## 90 -3.14159265 0.6418548 2.000000 1.561047e-08
## 93 1.42010544 0.3314213 2.341463 8.692469e-04
## 99 -2.22431284 0.3009051 12.800000 1.898757e-03